In [4]:
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

import pandas as pd

from iso3166 import countries

import plotly.express as px
In [6]:
uri = "mongodb://localhost:27017/?retryWrites=true&w=majority"
client = MongoClient(uri, server_api=ServerApi('1'))
database = client["OpenAlexEnvironmental"]
collection = database["journals"]

print(f"Total number of Articles: {collection.count_documents({})}")
Total number of Articles: 0
In [7]:
uri = "mongodb://localhost:27017/?retryWrites=true&w=majority"
client = MongoClient(uri, server_api=ServerApi('1'))
database = client["test_db"]
collection = database["journals"]

print(f"Total number of Articles: {collection.count_documents({})}")
Total number of Articles: 1039248
In [8]:
collection.find_one()
Out[8]:
{'_id': ObjectId('64d2f6d8adb21da659ac9df5'),
 'title': 'Triclosan exposure, transformation, and human health effects',
 'language': 'en',
 'publication_year': 2017,
 'publication_date': '2017-11-17',
 'type': 'article',
 'primary_location': {'is_oa': False,
  'landing_page_url': 'https://doi.org/10.1080/10937404.2017.1399306',
  'pdf_url': None,
  'source': {'id': 'https://openalex.org/S96923654',
   'display_name': 'Journal of Toxicology and Environmental Health-part B-critical Reviews',
   'issn_l': '1093-7404',
   'issn': ['1521-6950', '1093-7404'],
   'is_oa': False,
   'is_in_doaj': False,
   'host_organization': 'https://openalex.org/P4310320547',
   'host_organization_name': 'Taylor & Francis',
   'host_organization_lineage': ['https://openalex.org/P4310320547'],
   'host_organization_lineage_names': ['Taylor & Francis'],
   'type': 'journal'},
  'license': None,
  'version': None},
 'authorships': [{'author_position': 'first',
   'author': {'id': 'https://openalex.org/A5003146474',
    'display_name': 'Lisa M. Weatherly',
    'orcid': None},
   'institutions': [{'id': 'https://openalex.org/I7947594',
     'display_name': 'University of Maine',
     'ror': 'https://ror.org/01adr0w49',
     'country_code': 'US',
     'type': 'education'}],
   'countries': ['US'],
   'is_corresponding': False,
   'raw_affiliation_string': 'a Graduate School of Biomedical Science and Engineering , University of Maine , Orono , ME , USA.',
   'raw_affiliation_strings': ['a Graduate School of Biomedical Science and Engineering , University of Maine , Orono , ME , USA.']},
  {'author_position': 'last',
   'author': {'id': 'https://openalex.org/A5012277599',
    'display_name': 'Julie A. Gosse',
    'orcid': 'https://orcid.org/0000-0002-2457-4039'},
   'institutions': [{'id': 'https://openalex.org/I7947594',
     'display_name': 'University of Maine',
     'ror': 'https://ror.org/01adr0w49',
     'country_code': 'US',
     'type': 'education'}],
   'countries': ['US'],
   'is_corresponding': False,
   'raw_affiliation_string': 'b Department of Molecular and Biomedical Sciences , University of Maine , Orono , ME , USA.',
   'raw_affiliation_strings': ['b Department of Molecular and Biomedical Sciences , University of Maine , Orono , ME , USA.']}],
 'biblio': {'volume': '20',
  'issue': '8',
  'first_page': '447',
  'last_page': '469'},
 'concepts': [{'id': 'https://openalex.org/C2781289450',
   'wikidata': 'https://www.wikidata.org/wiki/Q408646',
   'display_name': 'Triclosan',
   'level': 2,
   'score': 0.9709624},
  {'id': 'https://openalex.org/C3018890749',
   'wikidata': 'https://www.wikidata.org/wiki/Q204711',
   'display_name': 'Food and drug administration',
   'level': 2,
   'score': 0.68914425},
  {'id': 'https://openalex.org/C143432726',
   'wikidata': 'https://www.wikidata.org/wiki/Q520181',
   'display_name': 'Hand sanitizer',
   'level': 2,
   'score': 0.63388824},
  {'id': 'https://openalex.org/C2776866151',
   'wikidata': 'https://www.wikidata.org/wiki/Q35855',
   'display_name': 'Toothpaste',
   'level': 2,
   'score': 0.5975416},
  {'id': 'https://openalex.org/C2987857752',
   'wikidata': 'https://www.wikidata.org/wiki/Q12147',
   'display_name': 'Human health',
   'level': 2,
   'score': 0.5535524},
  {'id': 'https://openalex.org/C71924100',
   'wikidata': 'https://www.wikidata.org/wiki/Q11190',
   'display_name': 'Medicine',
   'level': 0,
   'score': 0.47784802},
  {'id': 'https://openalex.org/C2908647359',
   'wikidata': 'https://www.wikidata.org/wiki/Q2625603',
   'display_name': 'Population',
   'level': 2,
   'score': 0.4776762},
  {'id': 'https://openalex.org/C2780035454',
   'wikidata': 'https://www.wikidata.org/wiki/Q8386',
   'display_name': 'Drug',
   'level': 2,
   'score': 0.46354717},
  {'id': 'https://openalex.org/C98274493',
   'wikidata': 'https://www.wikidata.org/wiki/Q128406',
   'display_name': 'Pharmacology',
   'level': 1,
   'score': 0.3782525},
  {'id': 'https://openalex.org/C99454951',
   'wikidata': 'https://www.wikidata.org/wiki/Q932068',
   'display_name': 'Environmental health',
   'level': 1,
   'score': 0.3534659},
  {'id': 'https://openalex.org/C199343813',
   'wikidata': 'https://www.wikidata.org/wiki/Q12128',
   'display_name': 'Dentistry',
   'level': 1,
   'score': 0.16849786},
  {'id': 'https://openalex.org/C142724271',
   'wikidata': 'https://www.wikidata.org/wiki/Q7208',
   'display_name': 'Pathology',
   'level': 1,
   'score': 0.12375158}],
 'abstract_inverted_index': {'Triclosan': [0],
  '(TCS)': [1],
  'is': [2, 14, 70],
  'an': [3],
  'antimicrobial': [4],
  'used': [5],
  'so': [6],
  'ubiquitously': [7],
  'that': [8],
  '75%': [9],
  'of': [10, 89],
  'the': [11, 37, 41],
  'US': [12, 42],
  'population': [13],
  'likely': [15],
  'exposed': [16],
  'to': [17, 93],
  'this': [18, 90],
  'compound': [19],
  'via': [20],
  'consumer': [21],
  'goods': [22],
  'and': [23, 44, 66, 76, 79, 85, 98, 104],
  'personal': [24, 57],
  'care': [25, 58],
  'products.': [26],
  'In': [27],
  'September': [28],
  '2016,': [29],
  'TCS': [30, 49, 69, 95, 116],
  'was': [31, 92, 122],
  'banned': [32],
  'from': [33],
  'soap': [34],
  'products': [35, 59],
  'following': [36],
  'risk': [38],
  'assessment': [39],
  'by': [40],
  'Food': [43],
  'Drug': [45],
  'Administration': [46],
  '(FDA).': [47],
  'However,': [48],
  'still': [50],
  'remains,': [51],
  'at': [52],
  'high': [53],
  'concentrations,': [54],
  'in': [55, 81],
  'other': [56],
  'such': [60, 118],
  'as': [61, 100, 102, 119],
  'toothpaste,': [62],
  'mouthwash,': [63],
  'hand': [64],
  'sanitizer,': [65],
  'surgical': [67],
  'soaps.': [68],
  'readily': [71],
  'absorbed': [72],
  'into': [73],
  'human': [74, 83, 111],
  'skin': [75],
  'oral': [77],
  'mucosa': [78],
  'found': [80],
  'various': [82],
  'tissues': [84],
  'fluids.': [86],
  'The': [87, 107],
  'aim': [88],
  'review': [91],
  'describe': [94],
  'exposure': [96],
  'routes': [97],
  'levels': [99],
  'well': [101],
  'metabolism': [103],
  'transformation': [105],
  'processes.': [106],
  'burgeoning': [108],
  'literature': [109],
  'on': [110],
  'health': [112],
  'effects': [113],
  'associated': [114],
  'with': [115],
  'exposure,': [117],
  'reproductive': [120],
  'problems,': [121],
  'also': [123],
  'summarized.': [124]},
 'cited_by_count': 308,
 'cited_by_api_url': 'https://api.openalex.org/works?filter=cites:W2768246307'}
In [9]:
publications_data_list=[]
c=0
for x in collection.find():
    if(len(x['authorships'])>0):
        for aut in x['authorships']:
            if(aut['author_position']=='first' and len(aut['institutions'])>0 and ('country_code' in aut['institutions'][0].keys())):
                first_author_ctrycode=aut['institutions'][0]['country_code']
    if(len(x['authorships'])>0):
        for aut in x['authorships']:
            if(aut['author_position']=='last' and len(aut['institutions'])>0 and ('country_code' in aut['institutions'][0].keys())):
                last_author_ctrycode=aut['institutions'][0]['country_code']
    if(c%10000==0):
        print(c, end="--")
    c=c+1
    publications_data_list.append((x['_id'],x['publication_year'],x['type'],first_author_ctrycode, last_author_ctrycode,x['cited_by_count'])) 
                
     
0--10000--20000--30000--40000--50000--60000--70000--80000--90000--100000--110000--120000--130000--140000--150000--160000--170000--180000--190000--200000--210000--220000--230000--240000--250000--260000--270000--280000--290000--300000--310000--320000--330000--340000--350000--360000--370000--380000--390000--400000--410000--420000--430000--440000--450000--460000--470000--480000--490000--500000--510000--520000--530000--540000--550000--560000--570000--580000--590000--600000--610000--620000--630000--640000--650000--660000--670000--680000--690000--700000--710000--720000--730000--740000--750000--760000--770000--780000--790000--800000--810000--820000--830000--840000--850000--860000--870000--880000--890000--900000--910000--920000--930000--940000--950000--960000--970000--980000--990000--1000000--1010000--1020000--1030000--
In [10]:
publications_data_list[0]
Out[10]:
(ObjectId('64d2f6d8adb21da659ac9df5'), 2017, 'article', 'US', 'US', 308)
In [11]:
publications_df = pd.DataFrame(publications_data_list, columns =['id', 'year', 'type','first_aut_cc','last_aut_cc','total_citations'])
publications_df.head()
Out[11]:
id year type first_aut_cc last_aut_cc total_citations
0 64d2f6d8adb21da659ac9df5 2017 article US US 308
1 64d2f6d8adb21da659ac9df6 2018 article DK DK 191
2 64d2f6d8adb21da659ac9df7 2013 article DK DK 174
3 64d2f6d8adb21da659ac9df8 2014 article GB GB 148
4 64d2f6d8adb21da659ac9df9 2013 article US US 139
In [12]:
firstauthor_pub_cnt=publications_df.loc[:, ['first_aut_cc','id']].groupby('first_aut_cc').count()
lastauthor_pub_cnt=publications_df.loc[:, ['last_aut_cc','id']].groupby('last_aut_cc').count()
In [13]:
firstauthor_pub_citations=publications_df.loc[:, ['first_aut_cc','total_citations']].groupby('first_aut_cc').sum()
lastauthor_pub_citations=publications_df.loc[:, ['last_aut_cc','total_citations']].groupby('last_aut_cc').sum()
In [14]:
firstauthor_pub_cnt.shape
Out[14]:
(198, 1)
In [15]:
firstauthor_pub_cnt.head()
Out[15]:
id
first_aut_cc
AD 2
AE 1078
AF 28
AL 91
AM 120
In [16]:
lastauthor_pub_cnt.shape
Out[16]:
(205, 1)
In [17]:
firstauthor_pub_cnt=firstauthor_pub_cnt.merge(firstauthor_pub_citations,on='first_aut_cc')
lastauthor_pub_cnt=lastauthor_pub_cnt.merge(lastauthor_pub_citations,on='last_aut_cc')
In [18]:
first_aut_ctry = []
last_aut_ctry=[]
first_aut_cc3 = []
last_aut_cc3=[]
c=0
for idx in firstauthor_pub_cnt.index:
    if(idx is not None):
        first_aut_ctry.append(countries.get(idx).name)
        #print(countries.get(idx).name)
        first_aut_cc3.append(countries.get(idx).alpha3)
    else:
        first_aut_ctry.append(None)
        first_aut_cc3.append(None)

for idx in lastauthor_pub_cnt.index:
    if(idx is not None):
        last_aut_ctry.append(countries.get(idx).name)
        last_aut_cc3.append(countries.get(idx).alpha3)
    else:
        last_aut_ctry.append(None)
        last_aut_cc3.append(None)
    
In [19]:
firstauthor_pub_cnt['Country']=first_aut_ctry
firstauthor_pub_cnt['Country Code']=first_aut_cc3
lastauthor_pub_cnt['Country']=last_aut_ctry
lastauthor_pub_cnt['Country Code']=last_aut_cc3
In [20]:
lastauthor_pub_cnt=lastauthor_pub_cnt.rename(columns={'id':'Articles Count'})
lastauthor_pub_cnt.head()
Out[20]:
Articles Count total_citations Country Country Code
last_aut_cc
AD 7 75 Andorra AND
AE 1132 20848 United Arab Emirates ARE
AF 27 233 Afghanistan AFG
AL 61 435 Albania ALB
AM 107 868 Armenia ARM
In [21]:
firstauthor_pub_cnt=firstauthor_pub_cnt.rename(columns={'id':'Articles Count'})
firstauthor_pub_cnt.head()
Out[21]:
Articles Count total_citations Country Country Code
first_aut_cc
AD 2 89 Andorra AND
AE 1078 20707 United Arab Emirates ARE
AF 28 167 Afghanistan AFG
AL 91 1268 Albania ALB
AM 120 830 Armenia ARM
In [22]:
income_level_data=pd.read_excel('world bank income division.xlsx')
income_level_data2=pd.read_excel('world bank income2.xlsx')
gdp_per_capita=pd.read_excel('world bank GDP data.xls')
gdp_data=pd.read_excel('Total GDP.xls')
pop_data=pd.read_excel('Pop_data.xls')
In [23]:
income_level_data=income_level_data.drop(columns={'Income Group Code','Income Group'})
income_level_data=income_level_data.drop_duplicates()
In [24]:
income_level_data.head()
Out[24]:
Country Code Country
0 ASM American Samoa
1 AND Andorra
2 ATG Antigua and Barbuda
3 ABW Aruba
4 AUS Australia
In [25]:
income_level_data2.head()
Out[25]:
Economy Income group
0 Aruba High income
1 Afghanistan Low income
2 Angola Lower middle income
3 Albania Upper middle income
4 Andorra High income
In [26]:
income_level_data2.shape
Out[26]:
(218, 2)
In [27]:
income_level_data.shape
Out[27]:
(217, 2)
In [28]:
income_level_data=income_level_data.merge(income_level_data2, right_on='Economy', left_on='Country')
income_level_data=income_level_data.drop(columns={'Economy','Country'})
income_level_data.head()
Out[28]:
Country Code Income group
0 ASM High income
1 AND High income
2 ATG High income
3 ABW High income
4 AUS High income
In [29]:
gdp_per_capita=gdp_per_capita.drop(columns={'Indicator Name', 'Country Name'})
gdp_per_capita=gdp_per_capita.rename(columns={'2022':'GDP per capita(US$)'})
gdp_per_capita=gdp_per_capita.drop_duplicates()
gdp_per_capita.head()
Out[29]:
Country Code GDP per capita(US$)
0 ABW 29342.100730
1 AFE 1622.391720
2 AFG 363.674087
3 AFW 1790.348800
4 AGO 2998.501158
In [30]:
gdp_data=gdp_data.drop(columns={'Indicator Name', 'Country Name'})
gdp_data=gdp_data.rename(columns={'2022': 'GDP Total(US$)'})
gdp_data=gdp_data.drop_duplicates()
gdp_data.head()
Out[30]:
Country Code GDP Total(US$) Unnamed: 4 Unnamed: 5 Unnamed: 6
0 ABW 3.126019e+09 NaN NaN NaN
1 AFE 1.169484e+12 NaN NaN NaN
2 AFG 1.458314e+10 NaN NaN NaN
3 AFW 8.778633e+11 NaN NaN NaN
4 AGO 1.067136e+11 NaN NaN NaN
In [31]:
pop_data=pop_data.drop(columns={'Indicator Name', 'Country Name'})
pop_data=pop_data.rename(columns={'2022': 'Population'})
pop_data=pop_data.drop_duplicates()
#pop_data.Population=pop_data.Population.astype(int)
pop_data.head()
Out[31]:
Country Code Population
0 ABW 106445.0
1 AFE 720839314.0
2 AFG 41128771.0
3 AFW 490330870.0
4 AGO 35588987.0
In [32]:
first_author_data=firstauthor_pub_cnt.merge(income_level_data, on='Country Code',how='left')
first_author_data=first_author_data.merge(gdp_data, on='Country Code', how='left')
first_author_data=first_author_data.merge(gdp_per_capita, on='Country Code', how='left')
first_author_data=first_author_data.merge(pop_data,on='Country Code', how='left')
first_author_data.head()
Out[32]:
Articles Count total_citations Country Country Code Income group GDP Total(US$) Unnamed: 4 Unnamed: 5 Unnamed: 6 GDP per capita(US$) Population
0 2 89 Andorra AND High income 3.352033e+09 NaN NaN NaN 41992.793358 79824.0
1 1078 20707 United Arab Emirates ARE High income 5.075349e+11 NaN NaN NaN 53757.863251 9441129.0
2 28 167 Afghanistan AFG Low income 1.458314e+10 NaN NaN NaN 363.674087 41128771.0
3 91 1268 Albania ALB Upper middle income 1.888210e+10 NaN NaN NaN 6802.804519 2775634.0
4 120 830 Armenia ARM Upper middle income 1.950278e+10 NaN NaN NaN 7014.206592 2780469.0
In [33]:
last_author_data=lastauthor_pub_cnt.merge(income_level_data, on='Country Code',how='left')
last_author_data=last_author_data.merge(gdp_data, on='Country Code', how='left')
last_author_data=last_author_data.merge(gdp_per_capita, on='Country Code', how='left')
last_author_data=last_author_data.merge(pop_data,on='Country Code', how='left')
last_author_data.head()
Out[33]:
Articles Count total_citations Country Country Code Income group GDP Total(US$) Unnamed: 4 Unnamed: 5 Unnamed: 6 GDP per capita(US$) Population
0 7 75 Andorra AND High income 3.352033e+09 NaN NaN NaN 41992.793358 79824.0
1 1132 20848 United Arab Emirates ARE High income 5.075349e+11 NaN NaN NaN 53757.863251 9441129.0
2 27 233 Afghanistan AFG Low income 1.458314e+10 NaN NaN NaN 363.674087 41128771.0
3 61 435 Albania ALB Upper middle income 1.888210e+10 NaN NaN NaN 6802.804519 2775634.0
4 107 868 Armenia ARM Upper middle income 1.950278e+10 NaN NaN NaN 7014.206592 2780469.0
In [34]:
first_Income_group_3levels=[]
for income in first_author_data['Income group']:
    if(income=='High income'):
        first_Income_group_3levels.append('High income')
    elif(income=='Upper middle income' or income=='Lower middle income'):
        first_Income_group_3levels.append('Middle income')
    elif(income=='Low income'):
        first_Income_group_3levels.append('Low income')
    else:
        first_Income_group_3levels.append('None')

last_Income_group_3levels=[]
for income in last_author_data['Income group']:
    if(income=='High income'):
        last_Income_group_3levels.append('High income')
    elif(income=='Upper middle income' or income=='Lower middle income'):
        last_Income_group_3levels.append('Middle income')
    elif(income=='Low income'):
        last_Income_group_3levels.append('Low income')
    else:
        last_Income_group_3levels.append('None')


first_author_data['Income_group_3levels']=first_Income_group_3levels
last_author_data['Income_group_3levels']=last_Income_group_3levels
In [35]:
first_author_data=first_author_data.sort_values(by=['Articles Count'], ascending=False)
last_author_data=last_author_data.sort_values(by=['Articles Count'], ascending=False)
In [36]:
first_author_data.head()
Out[36]:
Articles Count total_citations Country Country Code Income group GDP Total(US$) Unnamed: 4 Unnamed: 5 Unnamed: 6 GDP per capita(US$) Population Income_group_3levels
36 280766 5206537 China CHN Upper middle income 1.796317e+13 NaN NaN NaN 12720.215640 1.412175e+09 Middle income
185 128822 2904817 United States of America USA High income 2.546270e+13 NaN NaN NaN 76398.591742 3.332876e+08 High income
80 40010 704320 India IND Lower middle income 3.385090e+12 NaN NaN NaN 2388.621198 1.417173e+09 Middle income
61 31336 856868 United Kingdom of Great Britain and Northern I... GBR High income 3.070668e+12 NaN NaN NaN 45850.426122 6.697141e+07 High income
53 31162 634026 Spain ESP High income 1.397509e+12 NaN NaN NaN 29350.168521 4.761503e+07 High income
In [37]:
last_author_data.head()
Out[37]:
Articles Count total_citations Country Country Code Income group GDP Total(US$) Unnamed: 4 Unnamed: 5 Unnamed: 6 GDP per capita(US$) Population Income_group_3levels
36 265616 4830412 China CHN Upper middle income 1.796317e+13 NaN NaN NaN 12720.215640 1.412175e+09 Middle income
191 134193 3154211 United States of America USA High income 2.546270e+13 NaN NaN NaN 76398.591742 3.332876e+08 High income
83 39164 671137 India IND Lower middle income 3.385090e+12 NaN NaN NaN 2388.621198 1.417173e+09 Middle income
62 33098 906292 United Kingdom of Great Britain and Northern I... GBR High income 3.070668e+12 NaN NaN NaN 45850.426122 6.697141e+07 High income
53 33064 641088 Spain ESP High income 1.397509e+12 NaN NaN NaN 29350.168521 4.761503e+07 High income
In [38]:
print(first_author_data.shape)
print(last_author_data.shape)
first_author_data.to_excel("First_author_ctry_corresponding_data.xlsx")
last_author_data.to_excel("Last_author_ctry_corresponding_data.xlsx")
(198, 12)
(205, 12)
In [39]:
first_author_data=pd.read_excel('First_author_ctry_corresponding_data.xlsx')
last_author_data=pd.read_excel('Last_author_ctry_corresponding_data.xlsx')
In [40]:
print("First author country Null Income levels", first_author_data['Income group'].isna().sum())
print("First author country Null GDP per capita", first_author_data['GDP per capita(US$)'].isna().sum())
print("First author country Null GDP Total",first_author_data['GDP Total(US$)'].isna().sum())
print("First author country Null Population",first_author_data['Population'].isna().sum())
print("Last author country Null Income levels", last_author_data['Income group'].isna().sum())
print("Last author country Null GDP per capita", last_author_data['GDP per capita(US$)'].isna().sum())
print("Last author country Null GDP Total", last_author_data['GDP Total(US$)'].isna().sum())
print("Last author country Null Population", last_author_data['Population'].isna().sum())
First author country Null Income levels 8
First author country Null GDP per capita 11
First author country Null GDP Total 10
First author country Null Population 8
Last author country Null Income levels 9
Last author country Null GDP per capita 13
Last author country Null GDP Total 12
Last author country Null Population 9
In [41]:
first_author_ctry_data=first_author_data.dropna(axis=0, subset=['Income group', 'GDP per capita(US$)','GDP Total(US$)','Population'])
last_author_ctry_data=last_author_data.dropna(axis=0, subset=['Income group','GDP per capita(US$)','GDP Total(US$)','Population'])
In [42]:
print(first_author_ctry_data.shape)
print(last_author_ctry_data.shape)
(186, 13)
(191, 13)
In [43]:
first_author_ctry_data=first_author_ctry_data.loc[:,['Articles Count','total_citations','Country','Country Code','Income group','GDP Total(US$)','GDP per capita(US$)','Population','Income_group_3levels']]
last_author_ctry_data=last_author_ctry_data.loc[:,['Articles Count','total_citations','Country','Country Code','Income group','GDP Total(US$)','GDP per capita(US$)','Population','Income_group_3levels']]
In [44]:
first_author_ctry_data['citations_multiple']=first_author_ctry_data['Articles Count']*first_author_ctry_data['total_citations']
last_author_ctry_data['citations_multiple']=last_author_ctry_data['Articles Count']*last_author_ctry_data['total_citations']
In [45]:
first_author_ctry_data['normalized_citations']=(first_author_ctry_data['Articles Count']*first_author_ctry_data['total_citations'])/(first_author_ctry_data['Population'])
last_author_ctry_data['normalized_citations']=(last_author_ctry_data['Articles Count']*last_author_ctry_data['total_citations'])/(last_author_ctry_data['Population'])
In [55]:
first_author_ctry_data['normalized_citations1']=(first_author_ctry_data['Articles Count']*first_author_ctry_data['total_citations'])/(first_author_ctry_data['GDP Total(US$)'])
last_author_ctry_data['normalized_citations1']=(last_author_ctry_data['Articles Count']*last_author_ctry_data['total_citations'])/(last_author_ctry_data['GDP Total(US$)'])
In [58]:
first_author_ctry_data['normalized_citations2']=(first_author_ctry_data['Articles Count']*first_author_ctry_data['total_citations'])/(first_author_ctry_data['GDP per capita(US$)'])
last_author_ctry_data['normalized_citations2']=(last_author_ctry_data['Articles Count']*last_author_ctry_data['total_citations'])/(last_author_ctry_data['GDP per capita(US$)'])
In [46]:
first_author_ctry_data['normalised_cit']=(first_author_ctry_data['total_citations']/first_author_ctry_data['Articles Count'])
last_author_ctry_data['normalised_cit']=(last_author_ctry_data['total_citations']/last_author_ctry_data['Articles Count'])
In [61]:
first_author_ctry_data['normalised_cit2']=((first_author_ctry_data['total_citations']*first_author_ctry_data['GDP per capita(US$)'])/first_author_ctry_data['Articles Count'])
last_author_ctry_data['normalised_cit2']=((last_author_ctry_data['total_citations']*first_author_ctry_data['GDP per capita(US$)'])/last_author_ctry_data['Articles Count'])
In [47]:
fig = px.scatter(first_author_ctry_data, x="GDP Total(US$)", y='citations_multiple', color='Income group',hover_data=['Country'],
                 log_y=True,log_x=True, 
                 labels={"y": "Log scale of (Articles Count)", "GDP Total(US$)": "Log scale of GDP"},
               title='First Author Publications count normalized by citations vs GDP with Corresponding Income level',
               category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1000, height=800)
fig.show()
In [56]:
fig = px.scatter(first_author_ctry_data, x="GDP per capita(US$)", y='normalized_citations', color='Income group',hover_data=['Country'],
                 log_y=True,log_x=True, 
                 labels={"y": "Log scale of ( normalized Articles Count)", "GDP Total(US$)": "Log scale of GDP"},
               title='First Author Publications count normalized by citations  and population vs GDP with Corresponding Income level',
               category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1000, height=800)
fig.show()
In [65]:
fig = px.scatter(first_author_ctry_data, x="GDP per capita(US$)", y='normalised_cit2', color='Income group',hover_data=['Country'],
                 log_y=True,log_x=True, 
                 labels={"y": "Log scale of ( normalized Articles Count)", "GDP Total(US$)": "Log scale of GDP"},
               title='citations *GDP per capita/First Author Publications count normalized vs GDP per capita with Corresponding Income level',
               category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1000, height=800)
fig.show()
In [49]:
fig = px.scatter(first_author_ctry_data, x="GDP per capita(US$)", y='normalised_cit', color='Income group',hover_data=['Country'],
                 log_x=True, 
                 labels={"y": "Log scale of ( normalized Articles Count)", "GDP Total(US$)": "Log scale of GDP"},
               title='First Author Publications count normalized by citations  and population vs GDP with Corresponding Income level',
               category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1000, height=800)
fig.show()
In [50]:
fig = px.scatter(first_author_ctry_data, x="GDP Total(US$)", y='normalised_cit', color='Income group',hover_data=['Country'],
                 log_x=True, 
                 labels={"y": "Log scale of ( normalized Articles Count)", "GDP Total(US$)": "Log scale of GDP"},
               title='First Author Publications count normalized by citations  and population vs GDP with Corresponding Income level',
               category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1000, height=800)
fig.show()
In [51]:
first_author_ctry_data['normalized_citations']=(first_author_ctry_data['Articles Count']*first_author_ctry_data['total_citations'])/(first_author_ctry_data['Population']*first_author_ctry_data['GDP Total(US$)'])
last_author_ctry_data['normalized_citations']=(last_author_ctry_data['Articles Count']*last_author_ctry_data['total_citations'])/((last_author_ctry_data['Population']*last_author_ctry_data['GDP Total(US$)']))
In [52]:
fig = px.scatter(first_author_ctry_data, y="normalized_citations", x='GDP per capita(US$)', color='Income group',hover_data=['Country'],
                 log_x=True, log_y=True,
                 labels={"y": "Log scale of ( normalized Articles Count)", "GDP Total(US$)": "Log scale of GDP"},
               title='First Author Publications count normalized by citations  and population vs GDP with Corresponding Income level',
               category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1000, height=800)
fig.show()
In [53]:
fig = px.scatter(first_author_ctry_data, y="normalized_citations", x='normalized_citations', color='Income group',hover_data=['Country'],
                 log_x=True, log_y=True,
                 labels={"y": "Log scale of ( normalized Articles Count)", "GDP Total(US$)": "Log scale of GDP"},
               title='First Author Publications count normalized by citations  and population vs GDP with Corresponding Income level',
               category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1000, height=800)
fig.show()
In [54]:
##(articles count*citations)/population

fig=px.bar(first_author_ctry_data, x='Country', y='normalized_citations', color='Income group', log_y=True, color_discrete_map={
                'High income': 'blue',
                'Upper middle income' : 'green',
                'Lower middle income': 'yellow',
                'Low income' : 'red'},
            labels={"Articles Count": "Log of Articles Count"},
           title='First Author Publications count multiplied by citations  and divided by  population with Corresponding Income level',
               category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1500, height=800, text_auto=True)
fig.update_layout(xaxis_categoryorder = 'total descending')
fig.update_layout(
xaxis = dict(
tickfont = dict(size=5)))
fig.update_traces(cliponaxis=False)
'''
fig.update_layout(color_discrete_map={
                'High income': 'blue',
                'Upper middle income' : 'green',
                'Lower middle income': 'yellow',
                'Low Income' : 'red'})
'''
#fig.write_html("/Users/prathimagodha/Desktop/plot1.html")
fig.show()
In [57]:
##(articles count*citations)/gdp

fig=px.bar(first_author_ctry_data, x='Country', y='normalized_citations1', color='Income group', log_y=True, color_discrete_map={
                'High income': 'blue',
                'Upper middle income' : 'green',
                'Lower middle income': 'yellow',
                'Low income' : 'red'},
            labels={"Articles Count": "Log of Articles Count"},
           title='First Author Publications count multiplied by citations  and divided by  GDP with Corresponding Income level',
               category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1500, height=800, text_auto=True)
fig.update_layout(xaxis_categoryorder = 'total descending')
fig.update_layout(
xaxis = dict(
tickfont = dict(size=5)))
fig.update_traces(cliponaxis=False)
'''
fig.update_layout(color_discrete_map={
                'High income': 'blue',
                'Upper middle income' : 'green',
                'Lower middle income': 'yellow',
                'Low Income' : 'red'})
'''
#fig.write_html("/Users/prathimagodha/Desktop/plot1.html")
fig.show()
In [59]:
#(articles count*citations)/gdp per capita

fig=px.bar(first_author_ctry_data, x='Country', y='normalized_citations2', color='Income group', log_y=True, color_discrete_map={
                'High income': 'blue',
                'Upper middle income' : 'green',
                'Lower middle income': 'yellow',
                'Low income' : 'red'},
            labels={"Articles Count": "Log of Articles Count"},
           title='First Author Publications count multiplied by citations  and divided by GDP per capita with Corresponding Income level',
               category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1500, height=800, text_auto=True)
fig.update_layout(xaxis_categoryorder = 'total descending')
fig.update_layout(
xaxis = dict(
tickfont = dict(size=5)))
fig.update_traces(cliponaxis=False)
'''
fig.update_layout(color_discrete_map={
                'High income': 'blue',
                'Upper middle income' : 'green',
                'Lower middle income': 'yellow',
                'Low Income' : 'red'})
'''
#fig.write_html("/Users/prathimagodha/Desktop/plot1.html")
fig.show()
In [64]:
#(articles citations*gdp per capita)/ articles count

fig=px.bar(first_author_ctry_data, x='Country', y='normalised_cit2', color='Income group', log_y=True, color_discrete_map={
                'High income': 'blue',
                'Upper middle income' : 'green',
                'Lower middle income': 'yellow',
                'Low income' : 'red'},
            labels={"Articles Count": "Log of Articles Count"},
           title='First Author Publications count multiplied by citations  and divided by GDP per capita with Corresponding Income level',
               category_orders={'Income group':['High income', 'Upper middle income', 'Lower middle income','Low income']}, width=1500, height=800, text_auto=True)
fig.update_layout(xaxis_categoryorder = 'total descending')
fig.update_layout(
xaxis = dict(
tickfont = dict(size=5)))
fig.update_traces(cliponaxis=False)
'''
fig.update_layout(color_discrete_map={
                'High income': 'blue',
                'Upper middle income' : 'green',
                'Lower middle income': 'yellow',
                'Low Income' : 'red'})
'''
#fig.write_html("/Users/prathimagodha/Desktop/plot1.html")
fig.show()
In [ ]: